import requests
import re


def pageIndex(i):
    url = "http://www.gaokaopai.com/daxue-0-0-0-0-0-0-0--p-" + str(i) + ".html"
    response = requests.get(url)
    response.encoding = 'utf-8'
    html = response.text
    #   print(html)
    #   print(response.status_code)
    reg = re.compile(r' <h3><a href="(.*?)">(.*?)</a></h3>')
    page = re.findall(reg, html)
    #   print(page)
    for m in page:
        return m


def page2Index(m):
    url = m[0]
    response = requests.get(url)
    print(response.status_code)
    response.encoding = 'utf-8'
    html = response.text
    #   print(html)
    reg = re.compile(r'<a href="(.*?)">.*?<strong>(.*?)</strong></a>', re.S)
    info = re.findall(reg, html)
    info = info[3:]
    for i in info:
        #   print(i[0],i[1])
        return i


def city(m):
    url = m[0]
    #   print(url)
    response = requests.get(url)
    html = response.text
    reg = re.compile(r'<li data-val="(.*?)" data-id="(.*?)" name="city"')
    city = re.findall(reg, html)
    #   print(city)
    for i in city:
        ii = print(i[1] + '||' + i[0])

        data = {
            'cname': ii,
            'st': 2,
            'km': '2||理科'
        }
        response_1 = requests.post(url, data=data)
        html = response_1.text
        #       print(html)
        #       print(response_1.status_code)
        reg = re.compile(r'<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td>'
                         r'(.*?)</td>', re.S)
        score = re.findall(reg, html)
        #       for i in range(12):
        #       print(score[i])

        for i in range(12):
            print(score[i])

            # with open('F:\python1\file\高考派\scoree.txt','a',encoding='utf-8')as f:
            #     f.write(score[i]+'\n')

def main():
    for i in range(1, 30):
        m = pageIndex(i)

    m = page2Index(m)
    city(m)

main()
